
******************************************************************************
************* INPUT POWER PLANT DATA FROM BFE MAP ****************************
******************************************************************************
clear all
frame create PV
frame change PV

import delimited "$root/Data/Original/ElectricityProductionPlant.csv", encoding(UTF-8) clear

encode maincategory, g(plant_cat1)
label define main_cat 1 "Hydro" 2 "Renewable" 3 "Nuclear" 4 "Fossil" 
label values plant_cat1 main_cat

encode subcategory, g(plant_cat2)
tab subcategory plant_cat2
label define sub_cat  1 "Hydro" 2  "Waste" 3 "PV" 4 "Wind" 5 "Biomass"  6 "Geothermal" 7 "Nuclear" 8 "Oil" 9 "Gas" 10 "Coal" 
label values plant_cat2 sub_cat

encode plantcategory, g(plant_cat3)
tab plantcategory plant_cat3, nolabel
label define sub_cat2  1 "Wastewater power" 2 "freestanding" 3 "Biomass" 4 "Waste incineration" 5 "Wastewater treatment" 6 "Diversion" 7 "Weir" 8 "Continuous"  9 "Drinking water" 10 "pumped storage" 11 "storage" 12 "attached" 9 "integrated" 13 "freestanding" 
label values plant_cat3 sub_cat2

drop   maincategory subcategory plantcategory

frame create PV_detail
frame change PV_detail
import delimited "$root/Data/Original/PlantDetail.csv", encoding(UTF-8) clear

rename date detail_date
rename power detail_power 

drop xtf_id inclination orientation plantcategory

rename electricityproductionplantr xtf_id

*Extract the date information (both monthly and yearly)
replace detail_date=subinstr(detail_date,"-","",.)
gen td=date(detail_date,"YMD")
format td  %td
gen tm=mofd(td)
format tm %tm
gen ty=yofd(td)

frlink m:1 xtf_id, frame(PV)
frget _all, from(PV)

*Check what kind of information is part of the data: 
tab plant_cat2

*Collapse the data to have a yearly average information: 
gcollapse (mean) detail_power, by(xtf_id ty)

reshape wide detail_power, i(xtf_id) j(ty)

frame change PV
frlink 1:1 xtf_id , frame(PV_detail)
frget _all, from(PV_detail)
drop PV_detail
frame drop PV_detail


*Gen PLZ variable
tostring postcode, gen(PLZ)

*normalise address variable for potential merge if merge via coordinates does not work: 
foreach var in  address {
	replace `var' = lower(`var')
	replace `var' = subinstr(`var', "ä", "ae", .)
	replace `var' = subinstr(`var', "ö", "oe", .)
	replace `var' = subinstr(`var', "ü", "ue", .)
	replace `var' = subinstr(`var', "ê", "e", .)
	replace `var' = subinstr(`var', "é", "e", .)
	replace `var' = subinstr(`var', "è", "e", .)
	replace `var' = subinstr(`var', "ì", "i", .)
	replace `var' = subinstr(`var', "í", "i", .)
	replace `var' = subinstr(`var', "î", "i", .)
	replace `var' = subinstr(`var', "ô", "o", .)
	replace `var' = subinstr(`var', "ç", "c", .)
	replace `var' = trim(`var')
}

rename address street
gen adress=street+ " " + PLZ
drop PLZ
rename postcode PLZ
*Create coordinates: 
tostring _x, gen(x)
tostring _y, gen(y)
gen GKODX=substr(x,2,6)
gen GKODY=substr(y,2,6)

drop x y

destring GKODX, force replace
destring GKODY, force replace

*Bring the installation date into format and also extract the year: 
gen date_install=date(beginningofoperation,"YMD")
format date_install  %td
gen year_install=yofd(date_install)

order plant_cat1 plant_cat2 year_install, after(xtf_id)

*Only keep the PVs
keep if plant_cat2==3

save "$root/Data/Original/powerplants.dta", replace
